Import data

se6 <- readr::read_rds('./data/se6.rds') %>%
    arrange(ID) %>%
    mutate(ID = stringr::str_to_lower(ID))

# For site and group info
foo <- readr::read_rds('./data/demographics.rds') %>%
    select(ID, Site, Group, Sex) %>%
    rename(ID2 = ID) %>%
    arrange(ID2)

# Join the two datasets 
se6 <- foo %>%
    bind_cols(se6) %>%
    rowwise() %>%
    mutate(ID_match = ifelse(ID2 == ID,
                             yes = TRUE,
                             no = FALSE))

# Check for mismatches from join
nrow(filter(se6, ID_match == FALSE))

# Eliminate extra ID column
se6 <- se6 %>%
    select(-ID,
           -ID_match) %>%
    rename(ID = ID2)

Quick look

glimpse(se6)
## Observations: 160
## Variables: 40
## $ ID               <chr> "j1", "j10", "j11", "j12", "j17", "j18", "j19...
## $ Site             <chr> "u1", "u1", "u1", "u1", "u1", "u1", "u1", "u1...
## $ Group            <chr> "p", "t", "p", "t", "t", "t", "t", "p", "t", ...
## $ Sex              <chr> "female", "male", "female", "female", "female...
## $ Fatigue_BL       <dbl> 5, 9, 8, 8, 7, NA, 7, 7, 7, 6, NA, NA, 8, 7, ...
## $ Fatigue_Wk4      <dbl> NA, 9, 8, NA, NA, NA, 9, NA, 7, 8, NA, NA, NA...
## $ Fatigue_Wk8      <dbl> NA, 9, 10, 8, NA, NA, 9, 8, 7, 8, NA, NA, NA,...
## $ Fatigue_Wk12     <dbl> NA, 10, 7, 8, 5, NA, NA, NA, 7, 8, NA, NA, NA...
## $ Fatigue_Wk24     <dbl> NA, 10, NA, 8, NA, NA, NA, 6, 8, 8, NA, NA, N...
## $ Fatigue_Wk48     <dbl> NA, 7, NA, 6, NA, NA, NA, 8, NA, 9, NA, NA, N...
## $ Discomf_BL       <dbl> 5, 9, 10, 8, 8, NA, 7, 6, 6, 6, NA, NA, 9, 9,...
## $ Discomf_Wk4      <dbl> NA, 9, 10, NA, NA, NA, 9, NA, 7, 8, NA, NA, N...
## $ Discomf_Wk8      <dbl> NA, 9, 10, 9, NA, NA, 9, 8, 7, 8, NA, NA, NA,...
## $ Discomf_Wk12     <dbl> NA, 10, 10, 9, 5, NA, NA, NA, 9, 8, NA, NA, N...
## $ Discomf_Wk24     <dbl> NA, 10, NA, 8, NA, NA, NA, 7, 8, 8, NA, NA, N...
## $ Discomf_Wk48     <dbl> NA, 8, NA, 8, NA, NA, NA, 8, NA, 9, NA, NA, N...
## $ Distress_BL      <dbl> 5, 10, 10, 9, 10, NA, 9, 9, 5, 8, NA, NA, 9, ...
## $ Distress_Wk4     <dbl> NA, 9, 10, NA, NA, NA, 10, NA, 9, 9, NA, NA, ...
## $ Distress_Wk8     <dbl> NA, 9, 10, 9, NA, NA, 9, 7, 7, 8, NA, NA, NA,...
## $ Distress_Wk12    <dbl> NA, 10, 8, 9, 6, NA, NA, NA, 9, 9, NA, NA, NA...
## $ Distress_Wk24    <dbl> NA, 10, NA, 8, NA, NA, NA, 8, 9, 9, NA, NA, N...
## $ Distress_Wk48    <dbl> NA, 8, NA, 7, NA, NA, NA, 8, NA, 9, NA, NA, N...
## $ Other_sympt_BL   <dbl> 1, 10, 10, 9, 10, NA, 8, 9, 7, 8, NA, NA, NA,...
## $ Other_sympt_Wk4  <dbl> NA, 9, 10, NA, NA, NA, 10, NA, 9, 9, NA, NA, ...
## $ Other_sympt_Wk8  <dbl> NA, 9, 10, 9, NA, NA, 9, 8, 8, 10, NA, NA, NA...
## $ Other_sympt_Wk12 <dbl> NA, 10, 10, 10, 5, NA, NA, NA, 10, 9, NA, NA,...
## $ Other_sympt_Wk24 <dbl> NA, 10, NA, 9, NA, NA, NA, 8, 9, 9, NA, NA, N...
## $ Other_sympt_Wk48 <dbl> NA, 8, NA, 8, NA, NA, NA, 8, NA, 9, NA, NA, N...
## $ Tasks_BL         <dbl> 5, 10, 10, 10, 9, NA, 10, 8, 7, 8, NA, NA, 9,...
## $ Tasks_Wk4        <dbl> NA, 10, 10, NA, NA, NA, 10, NA, 10, 9, NA, NA...
## $ Tasks_Wk8        <dbl> NA, 10, 10, 10, NA, NA, 10, 8, 9, 10, NA, NA,...
## $ Tasks_Wk12       <dbl> NA, 10, 10, 10, 4, NA, NA, NA, 10, 9, NA, NA,...
## $ Tasks_Wk24       <dbl> NA, 10, NA, 9, NA, NA, NA, 9, 10, 10, NA, NA,...
## $ Tasks_Wk48       <dbl> NA, 8, NA, 9, NA, NA, NA, 8, NA, 9, NA, NA, N...
## $ Non_drug_BL      <dbl> 5, 10, 10, 10, 9, NA, 10, 8, 7, 9, NA, NA, 10...
## $ Non_drug_Wk4     <dbl> NA, 10, 10, NA, NA, NA, 10, NA, 10, 9, NA, NA...
## $ Non_drug_Wk8     <dbl> NA, 10, 10, 10, NA, NA, 10, 8, 9, 10, NA, NA,...
## $ Non_drug_Wk12    <dbl> NA, 10, 10, 10, 5, NA, NA, NA, 10, 9, NA, NA,...
## $ Non_drug_Wk24    <dbl> NA, 10, NA, 10, NA, NA, NA, 9, 10, 10, NA, NA...
## $ Non_drug_Wk48    <dbl> NA, 6, NA, 9, NA, NA, NA, 8, NA, 7, NA, NA, N...
head(se6)
tail(se6)

Clean data

# Calculate mean score for each time point
se6_BL <- se6 %>% select(contains('BL'))
se6$mean_BL <- rowMeans(se6_BL, na.rm = TRUE)

se6_Wk4 <- se6 %>% select(contains('Wk4'))
se6$mean_Wk4 <- rowMeans(se6_Wk4, na.rm = TRUE)

se6_Wk8 <- se6 %>% select(contains('Wk8'))
se6$mean_Wk8 <- rowMeans(se6_Wk8, na.rm = TRUE)

se6_Wk12 <- se6 %>% select(contains('Wk12'))
se6$mean_Wk12 <- rowMeans(se6_Wk12, na.rm = TRUE)

se6_Wk24 <- se6 %>% select(contains('Wk24'))
se6$mean_Wk24 <- rowMeans(se6_Wk24, na.rm = TRUE)

se6_Wk48 <- se6 %>% select(contains('Wk48'))
se6$mean_Wk48 <- rowMeans(se6_Wk48, na.rm = TRUE)

# Check
glimpse(se6)
## Observations: 160
## Variables: 46
## $ ID               <chr> "j1", "j10", "j11", "j12", "j17", "j18", "j19...
## $ Site             <chr> "u1", "u1", "u1", "u1", "u1", "u1", "u1", "u1...
## $ Group            <chr> "p", "t", "p", "t", "t", "t", "t", "p", "t", ...
## $ Sex              <chr> "female", "male", "female", "female", "female...
## $ Fatigue_BL       <dbl> 5, 9, 8, 8, 7, NA, 7, 7, 7, 6, NA, NA, 8, 7, ...
## $ Fatigue_Wk4      <dbl> NA, 9, 8, NA, NA, NA, 9, NA, 7, 8, NA, NA, NA...
## $ Fatigue_Wk8      <dbl> NA, 9, 10, 8, NA, NA, 9, 8, 7, 8, NA, NA, NA,...
## $ Fatigue_Wk12     <dbl> NA, 10, 7, 8, 5, NA, NA, NA, 7, 8, NA, NA, NA...
## $ Fatigue_Wk24     <dbl> NA, 10, NA, 8, NA, NA, NA, 6, 8, 8, NA, NA, N...
## $ Fatigue_Wk48     <dbl> NA, 7, NA, 6, NA, NA, NA, 8, NA, 9, NA, NA, N...
## $ Discomf_BL       <dbl> 5, 9, 10, 8, 8, NA, 7, 6, 6, 6, NA, NA, 9, 9,...
## $ Discomf_Wk4      <dbl> NA, 9, 10, NA, NA, NA, 9, NA, 7, 8, NA, NA, N...
## $ Discomf_Wk8      <dbl> NA, 9, 10, 9, NA, NA, 9, 8, 7, 8, NA, NA, NA,...
## $ Discomf_Wk12     <dbl> NA, 10, 10, 9, 5, NA, NA, NA, 9, 8, NA, NA, N...
## $ Discomf_Wk24     <dbl> NA, 10, NA, 8, NA, NA, NA, 7, 8, 8, NA, NA, N...
## $ Discomf_Wk48     <dbl> NA, 8, NA, 8, NA, NA, NA, 8, NA, 9, NA, NA, N...
## $ Distress_BL      <dbl> 5, 10, 10, 9, 10, NA, 9, 9, 5, 8, NA, NA, 9, ...
## $ Distress_Wk4     <dbl> NA, 9, 10, NA, NA, NA, 10, NA, 9, 9, NA, NA, ...
## $ Distress_Wk8     <dbl> NA, 9, 10, 9, NA, NA, 9, 7, 7, 8, NA, NA, NA,...
## $ Distress_Wk12    <dbl> NA, 10, 8, 9, 6, NA, NA, NA, 9, 9, NA, NA, NA...
## $ Distress_Wk24    <dbl> NA, 10, NA, 8, NA, NA, NA, 8, 9, 9, NA, NA, N...
## $ Distress_Wk48    <dbl> NA, 8, NA, 7, NA, NA, NA, 8, NA, 9, NA, NA, N...
## $ Other_sympt_BL   <dbl> 1, 10, 10, 9, 10, NA, 8, 9, 7, 8, NA, NA, NA,...
## $ Other_sympt_Wk4  <dbl> NA, 9, 10, NA, NA, NA, 10, NA, 9, 9, NA, NA, ...
## $ Other_sympt_Wk8  <dbl> NA, 9, 10, 9, NA, NA, 9, 8, 8, 10, NA, NA, NA...
## $ Other_sympt_Wk12 <dbl> NA, 10, 10, 10, 5, NA, NA, NA, 10, 9, NA, NA,...
## $ Other_sympt_Wk24 <dbl> NA, 10, NA, 9, NA, NA, NA, 8, 9, 9, NA, NA, N...
## $ Other_sympt_Wk48 <dbl> NA, 8, NA, 8, NA, NA, NA, 8, NA, 9, NA, NA, N...
## $ Tasks_BL         <dbl> 5, 10, 10, 10, 9, NA, 10, 8, 7, 8, NA, NA, 9,...
## $ Tasks_Wk4        <dbl> NA, 10, 10, NA, NA, NA, 10, NA, 10, 9, NA, NA...
## $ Tasks_Wk8        <dbl> NA, 10, 10, 10, NA, NA, 10, 8, 9, 10, NA, NA,...
## $ Tasks_Wk12       <dbl> NA, 10, 10, 10, 4, NA, NA, NA, 10, 9, NA, NA,...
## $ Tasks_Wk24       <dbl> NA, 10, NA, 9, NA, NA, NA, 9, 10, 10, NA, NA,...
## $ Tasks_Wk48       <dbl> NA, 8, NA, 9, NA, NA, NA, 8, NA, 9, NA, NA, N...
## $ Non_drug_BL      <dbl> 5, 10, 10, 10, 9, NA, 10, 8, 7, 9, NA, NA, 10...
## $ Non_drug_Wk4     <dbl> NA, 10, 10, NA, NA, NA, 10, NA, 10, 9, NA, NA...
## $ Non_drug_Wk8     <dbl> NA, 10, 10, 10, NA, NA, 10, 8, 9, 10, NA, NA,...
## $ Non_drug_Wk12    <dbl> NA, 10, 10, 10, 5, NA, NA, NA, 10, 9, NA, NA,...
## $ Non_drug_Wk24    <dbl> NA, 10, NA, 10, NA, NA, NA, 9, 10, 10, NA, NA...
## $ Non_drug_Wk48    <dbl> NA, 6, NA, 9, NA, NA, NA, 8, NA, 7, NA, NA, N...
## $ mean_BL          <dbl> 4.333333, 9.666667, 9.666667, 9.000000, 8.833...
## $ mean_Wk4         <dbl> NaN, 8.416667, 9.666667, 7.833333, NaN, NaN, ...
## $ mean_Wk8         <dbl> NaN, 9.333333, 10.000000, 9.166667, NaN, NaN,...
## $ mean_Wk12        <dbl> NaN, 10.000000, 9.166667, 9.333333, 5.000000,...
## $ mean_Wk24        <dbl> NaN, 10.000000, NaN, 8.666667, NaN, NaN, NaN,...
## $ mean_Wk48        <dbl> NaN, 7.500000, NaN, 7.833333, NaN, NaN, NaN, ...
# Gather from wide format into long format
se6_tot <- se6 %>%
    tidyr::gather(key = se6_question,
                  value = se6_rating,
                  -ID, - Site, - Group, -Sex)
    
# Create columns for domain and time
se6_tot <- se6_tot %>%
    mutate(Domain = case_when(
        stringr::str_detect(.$se6_question, "Fatigue") ~ "Fatigue",
        stringr::str_detect(.$se6_question, "Discomf") ~ "Discomf",
        stringr::str_detect(.$se6_question, "Distress") ~ "Distress",
        stringr::str_detect(.$se6_question, "Other_sympt") ~ "Other_sympt",
        stringr::str_detect(.$se6_question, "Tasks") ~ "Tasks",
        stringr::str_detect(.$se6_question, "Non_drug") ~ "Non_drug",
        stringr::str_detect(.$se6_question, "mean") ~ "Mean"
    ))

se6_tot <- se6_tot %>% 
    mutate(Week = case_when(
        stringr::str_detect(.$se6_question, "Wk48")  ~ 48,
        stringr::str_detect(.$se6_question, "BL")  ~ 0,
        stringr::str_detect(.$se6_question, "Wk4$")  ~ 4,
        stringr::str_detect(.$se6_question, "Wk8")  ~ 8,
        stringr::str_detect(.$se6_question, "Wk12")  ~ 12,
        stringr::str_detect(.$se6_question, "Wk24")  ~ 24
    ))

# Check column contents
unique(se6_tot$Week) 
## [1]  0  4  8 12 24 48
unique(se6_tot$Domain)
## [1] "Fatigue"     "Discomf"     "Distress"    "Other_sympt" "Tasks"      
## [6] "Non_drug"    "Mean"
# Delete time_point column
se6_tot <- se6_tot %>% select(-se6_question)

Plots

Spaghetti plots

se6_tot %>%
    filter(Domain == 'Mean') %>%
    ggplot(.) +
    aes(x = Week,
        y = se6_rating,
        group = ID,
        colour = Group) %>%
    geom_line(size = 0.4, 
              position = position_jitterdodge(dodge.width = 0.2)) +
    labs(title = 'Mean self-efficacy (SE-6)',
         subtitle = 'Coloured by group')

se6_tot %>%
    filter(Domain == 'Mean') %>%
    ggplot(.) +
    aes(x = Week,
        y = se6_rating,
        group = ID,
        colour = Group) %>%
    geom_line(size = 0.4, 
              position = position_jitterdodge(dodge.width = 0.2)) +
    facet_grid(Site ~.)

    labs(title = 'Mean self-efficacy (SE-6)',
         subtitle = 'Coloured by group, faceted by site')
## $title
## [1] "Mean self-efficacy (SE-6)"
## 
## $subtitle
## [1] "Coloured by group, faceted by site"
## 
## attr(,"class")
## [1] "labels"
se6_tot %>%
    filter(Domain == 'Mean') %>%
    ggplot(.) +
    aes(x = Week,
        y = se6_rating,
        group = ID,
        colour = Group) %>%
    geom_line(size = 0.4, 
              position = position_jitterdodge(dodge.width = 0.2)) +
    facet_grid(Site ~ Sex)

    labs(title = 'Mean self-efficacy (SE-6)',
         subtitle = 'Coloured by group, faceted by site and gender')
## $title
## [1] "Mean self-efficacy (SE-6)"
## 
## $subtitle
## [1] "Coloured by group, faceted by site and gender"
## 
## attr(,"class")
## [1] "labels"
se6_tot %>%
    filter(Domain == 'Mean') %>%
    ggplot(.) +
    aes(x = Week,
        y = se6_rating,
        group = ID,
        colour = Sex) %>%
    geom_line(size = 0.4, 
              position = position_jitterdodge(dodge.width = 0.2)) +
    labs(title = 'Mean self-efficacy (SE-6)',
         subtitle = 'Coloured by gender')

Summary plots

se6_tot %>% 
    filter(Domain == "Mean") %>%
    mutate(Week = as.factor(Week)) %>% # Reclassify 'Week' from numeric to factor
    ggplot(.) +
    aes(x = Week,
        y = se6_rating,
        colour = Group,
        fill = Group) %>%
    geom_boxplot(alpha = 0.6) +
    labs(title = 'Self-efficacy (SE-6)', 
         subtitle = 'Coloured by group')

se6_tot %>% 
    filter(Domain == "Mean") %>%
    mutate(Week = as.factor(Week)) %>% # Reclassify 'Week' from numeric to factor
    ggplot(.) +
    aes(x = Week,
        y = se6_rating,
        colour = Group,
        fill = Group) %>%
    geom_boxplot(alpha = 0.6) +
    facet_grid(Site ~ .) +
    labs(title = 'Self-efficacy (SE-6)', 
         subtitle = 'Faceted by study site, coloured by group')

se6_tot %>% 
    filter(Domain == "Mean") %>%
    mutate(Week = as.factor(Week)) %>% # Reclassify 'Week' from numeric to factor
    ggplot(.) +
    aes(x = Week,
        y = se6_rating,
        colour = Sex,
        fill = Sex) %>%
    geom_boxplot(alpha = 0.6) +
    facet_grid(Group ~ .) +
    labs(title = 'Self-efficacy (SE-6)', 
         subtitle = 'Faceted by study group, coloured by sex')

se6_tot %>% 
    filter(Domain == "Mean") %>%
    mutate(Week = as.factor(Week)) %>% # Reclassify 'Week' from numeric to factor
    ggplot(.) +
    aes(x = Week,
        y = se6_rating,
        colour = Sex,
        fill = Sex) %>%
    geom_boxplot(alpha = 0.6) +
    facet_grid(Site ~ Group) +
    labs(title = 'Self-efficacy (SE-6)', 
         subtitle = 'Faceted by study site and group, coloured by sex')

se6_tot %>% 
    filter(Domain == "Mean") %>%
    filter(Sex == "female") %>%
    mutate(Week = as.factor(Week)) %>% # Reclassify 'Week' from numeric to factor 
    ggplot(.) +
    aes(x = Week,
        y = se6_rating) %>%
    geom_boxplot(alpha = 0.6) +
    labs(title = 'Self-efficacy (SE-6)',
         subtitle = 'Women only')

se6_tot %>% 
    filter(Domain == "Mean") %>%
    filter(Sex == "female") %>%
    filter(Site != 'u1') %>%
    mutate(Week = as.factor(Week)) %>% # Reclassify 'Week' from numeric to factor 
    ggplot(.) +
    aes(x = Week,
        y = se6_rating) %>%
    geom_boxplot(alpha = 0.6) +
    labs(title = 'Self-efficacy (SE-6)', 
         subtitle = 'Women only (excluding U1)')

se6_tot %>%
    filter(Domain != 'mean') %>%
    ggplot(.) +
    aes(x = as.factor(Week),
        y = se6_rating,
        colour = Domain,
        fill = Domain) %>%
    geom_boxplot(alpha = 0.6) +
    labs(title = 'Self-efficacy domains',
         subtitle = 'Coloured by domain')

se6_tot %>%
    filter(Domain != 'mean') %>%
    ggplot(.) +
    aes(x = as.factor(Week),
        y = se6_rating,
        colour = Domain,
        fill = Domain) %>%
    geom_boxplot(alpha = 0.6) +
    facet_grid(Domain ~ .) +
    labs(title = 'Self-efficacy domains',
         subtitle = 'Faceted by domain')

se6_tot %>%
    filter(Domain != 'mean') %>%
    ggplot(.) +
    aes(x = as.factor(Week),
        y = se6_rating,
        colour = Group,
        fill = Group) %>%
    geom_boxplot(alpha = 0.6) +
    facet_grid(Domain ~ .) +
    labs(title = 'Self-efficacy domains',
         subtitle = 'Faceted by domain, coloured by group')

se6_tot %>%
    filter(Domain != 'mean') %>%
    ggplot(.) +
    aes(x = as.factor(Week),
        y = se6_rating,
        colour = Site,
        fill = Site) %>%
    geom_boxplot(alpha = 0.6) +
    facet_grid(Domain ~ .) +
    labs(title = 'Self-efficacy domains',
         subtitle = 'Faceted by domain, coloured by site')

Session information

sessionInfo()
## R version 3.4.2 (2017-09-28)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Sierra 10.12.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] bindrcpp_0.2  stringr_1.2.0 ggplot2_2.2.1 dplyr_0.7.4  
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.13     knitr_1.17       bindr_0.1        magrittr_1.5    
##  [5] hms_0.3          tidyselect_0.2.2 munsell_0.4.3    colorspace_1.3-2
##  [9] R6_2.2.2         rlang_0.1.2      plyr_1.8.4       tools_3.4.2     
## [13] grid_3.4.2       gtable_0.2.0     htmltools_0.3.6  lazyeval_0.2.0  
## [17] yaml_2.1.14      assertthat_0.2.0 rprojroot_1.2    digest_0.6.12   
## [21] tibble_1.3.4     reshape2_1.4.2   purrr_0.2.4      tidyr_0.7.2     
## [25] readr_1.1.1      glue_1.1.1       evaluate_0.10.1  rmarkdown_1.6   
## [29] labeling_0.3     stringi_1.1.5    compiler_3.4.2   scales_0.5.0    
## [33] backports_1.1.1  jsonlite_1.5     pkgconfig_2.0.1